From 456642c9091ce62e2920a44b0cda53dd841c8a4f Mon Sep 17 00:00:00 2001
From: "mwilli2@equilibrium.research.intel-research.net"
 <mwilli2@equilibrium.research.intel-research.net>
Date: Fri, 26 Mar 2004 18:32:29 +0000
Subject: [PATCH] bitkeeper revision 1.825.3.7 (4064773d4Vkaf0WFguSCpOO7O0qqEQ)

Add Atropos code and update control interface.
---
 .rootkeys                              |   1 +
 tools/xc/lib/xc.h                      |  20 +
 tools/xc/lib/xc_atropos.c              |  37 +-
 tools/xc/lib/xc_bvtsched.c             |  48 +-
 tools/xc/lib/xc_misc.c                 |  17 +
 tools/xc/lib/xc_rrobin.c               |  20 +-
 tools/xc/py/Xc.c                       | 150 ++++++-
 xen/common/dom0_ops.c                  |  11 +-
 xen/common/keyhandler.c                |  32 +-
 xen/common/sched_atropos.c             | 598 +++++++++++++++++++++++++
 xen/common/sched_bvt.c                 |  65 ++-
 xen/common/sched_rrobin.c              |  12 +-
 xen/common/schedule.c                  | 122 +++--
 xen/include/hypervisor-ifs/dom0_ops.h  |  14 +-
 xen/include/hypervisor-ifs/sched_ctl.h |  16 +-
 xen/include/xen/sched-if.h             |   4 +-
 xen/include/xen/sched.h                |   5 +
 17 files changed, 1041 insertions(+), 131 deletions(-)
 create mode 100644 xen/common/sched_atropos.c

diff --git a/.rootkeys b/.rootkeys
index 03b625759b..007f0440b5 100644
--- a/.rootkeys
+++ b/.rootkeys
@@ -175,6 +175,7 @@
 4051bcecFeq4DE70p4zGO5setf47CA xen/common/physdev.c
 4006e659i9j-doVxY7DKOGU4XVin1Q xen/common/rbtree.c
 3ddb79bdHqdQpATqC0rmUZNbsb6L6A xen/common/resource.c
+4064773cJ31vZt-zhbSoxqft1Jaw0w xen/common/sched_atropos.c
 40589968dD2D1aejwSOvrROg7fOvGQ xen/common/sched_bvt.c
 40589968be_t_n0-w6ggceW7h-sx0w xen/common/sched_rrobin.c
 3e397e6619PgAfBbw2XFbXkewvUWgw xen/common/schedule.c
diff --git a/tools/xc/lib/xc.h b/tools/xc/lib/xc.h
index 1045be69d0..0abd00989e 100644
--- a/tools/xc/lib/xc.h
+++ b/tools/xc/lib/xc.h
@@ -74,6 +74,7 @@ int xc_netbsd_build(int xc_handle,
 
 int xc_bvtsched_global_set(int xc_handle,
                            unsigned long ctx_allow);
+
 int xc_bvtsched_domain_set(int xc_handle,
                            u64 domid,
                            unsigned long mcuadv,
@@ -81,13 +82,32 @@ int xc_bvtsched_domain_set(int xc_handle,
                            unsigned long warpl,
                            unsigned long warpu);
 
+int xc_bvtsched_global_get(int xc_handle,
+			   unsigned long *ctx_allow);
+
+int xc_bvtsched_domain_get(int xc_handle,
+                           u64 domid,
+                           unsigned long *mcuadv,
+                           unsigned long *warp,
+                           unsigned long *warpl,
+                           unsigned long *warpu);
+
 int xc_atropos_domain_set(int xc_handle,
 			  u64 domid,
+			  u64 period, u64 slice, u64 latency,
 			  int xtratime);
 
+int xc_atropos_domain_get(int xc_handle,
+                          u64 domid,
+                          u64* period, u64 *slice, u64 *latency,
+                          int *xtratime);
+
 int xc_rrobin_global_set(int xc_handle,
 			 u64 slice);
 
+int xc_rrobin_global_get(int xc_handle,
+                         u64 *slice);
+
 typedef struct {
     unsigned long credit_bytes;
     unsigned long credit_usec;
diff --git a/tools/xc/lib/xc_atropos.c b/tools/xc/lib/xc_atropos.c
index 06ba01cf32..3b4535d96d 100644
--- a/tools/xc/lib/xc_atropos.c
+++ b/tools/xc/lib/xc_atropos.c
@@ -8,29 +8,44 @@
 
 #include "xc_private.h"
 
-int xc_atropos_global_set(int xc_handle,
-			  unsigned long ctx_allow)
+int xc_atropos_domain_set(int xc_handle,
+			  u64 domid, u64 period, u64 slice, u64 latency,
+                          int xtratime)
 {
     dom0_op_t op;
+    struct atropos_adjdom *p = &op.u.adjustdom.u.atropos;
 
-    op.cmd = DOM0_SCHEDCTL;
-    op.u.schedctl.sched_id = SCHED_BVT;
+    op.cmd = DOM0_ADJUSTDOM;
+    op.u.adjustdom.domain  = (domid_t)domid;
+    op.u.adjustdom.sched_id = SCHED_ATROPOS;
+    op.u.adjustdom.direction = SCHED_INFO_PUT;
 
-    op.u.schedctl.u.bvt.ctx_allow = ctx_allow;
+    p->period   = period;
+    p->slice    = slice;
+    p->latency  = latency;
+    p->xtratime = xtratime;
 
     return do_dom0_op(xc_handle, &op);
 }
 
-int xc_atropos_domain_set(int xc_handle,
-			  u64 domid, int xtratime)
+int xc_atropos_domain_get(int xc_handle, u64 domid, u64 *period,
+                          u64 *slice, u64 *latency, int *xtratime)
 {
     dom0_op_t op;
+    int ret;
+    struct atropos_adjdom *p = &op.u.adjustdom.u.atropos;
 
-    op.cmd = DOM0_ADJUSTDOM;
-    op.u.adjustdom.domain  = (domid_t)domid;
+    op.cmd = DOM0_ADJUSTDOM;    
+    op.u.adjustdom.domain = (domid_t)domid;
     op.u.adjustdom.sched_id = SCHED_ATROPOS;
+    op.u.adjustdom.direction = SCHED_INFO_GET;
 
-    op.u.adjustdom.u.atropos.xtratime = xtratime;
+    ret = do_dom0_op(xc_handle, &op);
 
-    return do_dom0_op(xc_handle, &op);
+    *period   = p->period;
+    *slice    = p->slice;
+    *latency  = p->latency;
+    *xtratime = p->xtratime;
+
+    return ret;
 }
diff --git a/tools/xc/lib/xc_bvtsched.c b/tools/xc/lib/xc_bvtsched.c
index 428c2d6c32..e5106b561b 100644
--- a/tools/xc/lib/xc_bvtsched.c
+++ b/tools/xc/lib/xc_bvtsched.c
@@ -15,12 +15,29 @@ int xc_bvtsched_global_set(int xc_handle,
 
     op.cmd = DOM0_SCHEDCTL;
     op.u.schedctl.sched_id = SCHED_BVT;
-
+    op.u.schedctl.direction = SCHED_INFO_PUT;
     op.u.schedctl.u.bvt.ctx_allow = ctx_allow;
 
     return do_dom0_op(xc_handle, &op);
 }
 
+int xc_bvtsched_global_get(int xc_handle,
+			   unsigned long *ctx_allow)
+{
+    dom0_op_t op;
+    int ret;
+    
+    op.cmd = DOM0_SCHEDCTL;
+    op.u.schedctl.sched_id = SCHED_BVT;
+    op.u.schedctl.direction = SCHED_INFO_GET;
+
+    ret = do_dom0_op(xc_handle, &op);
+
+    *ctx_allow = op.u.schedctl.u.bvt.ctx_allow;
+
+    return ret;
+}
+
 int xc_bvtsched_domain_set(int xc_handle,
                            u64 domid,
                            unsigned long mcuadv,
@@ -34,11 +51,38 @@ int xc_bvtsched_domain_set(int xc_handle,
     op.cmd = DOM0_ADJUSTDOM;
     op.u.adjustdom.domain  = (domid_t)domid;
     op.u.adjustdom.sched_id = SCHED_BVT;
+    op.u.adjustdom.direction = SCHED_INFO_PUT;
 
     bvtadj->mcu_adv = mcuadv;
     bvtadj->warp    = warp;
     bvtadj->warpl   = warpl;
     bvtadj->warpu   = warpu;
-
     return do_dom0_op(xc_handle, &op);
 }
+
+
+int xc_bvtsched_domain_get(int xc_handle,
+			   u64 domid,
+			   unsigned long *mcuadv,
+			   unsigned long *warp,
+                           unsigned long *warpl,
+                           unsigned long *warpu)
+{
+    
+    dom0_op_t op;
+    int ret;
+    struct bvt_adjdom *adjptr = &op.u.adjustdom.u.bvt;
+
+    op.cmd = DOM0_ADJUSTDOM;
+    op.u.adjustdom.domain  = (domid_t)domid;
+    op.u.adjustdom.sched_id = SCHED_BVT;
+    op.u.adjustdom.direction = SCHED_INFO_GET;
+
+    ret = do_dom0_op(xc_handle, &op);
+
+    *mcuadv = adjptr->mcu_adv;
+    *warp   = adjptr->warp;
+    *warpl  = adjptr->warpl;
+    *warpu  = adjptr->warpu;
+    return ret;
+}
diff --git a/tools/xc/lib/xc_misc.c b/tools/xc/lib/xc_misc.c
index 15fcead97c..9f087d56fb 100644
--- a/tools/xc/lib/xc_misc.c
+++ b/tools/xc/lib/xc_misc.c
@@ -68,3 +68,20 @@ int xc_physinfo(int xc_handle,
     return 0;
 }
 
+
+int xc_sched_id(int xc_handle,
+                int *sched_id)
+{
+    int ret;
+    dom0_op_t op;
+    
+    op.cmd = DOM0_SCHED_ID;
+    op.interface_version = DOM0_INTERFACE_VERSION;
+    
+    if((ret = do_dom0_op(xc_handle, &op))) return ret;
+    
+    *sched_id = op.u.sched_id.sched_id;
+    
+    return 0;
+}
+
diff --git a/tools/xc/lib/xc_rrobin.c b/tools/xc/lib/xc_rrobin.c
index c915508050..ad37962f3b 100644
--- a/tools/xc/lib/xc_rrobin.c
+++ b/tools/xc/lib/xc_rrobin.c
@@ -11,11 +11,27 @@
 int xc_rrobin_global_set(int xc_handle, u64 slice)
 {
     dom0_op_t op;
-
     op.cmd = DOM0_SCHEDCTL;
     op.u.schedctl.sched_id = SCHED_RROBIN;
+    op.u.schedctl.direction = SCHED_INFO_PUT;
 
     op.u.schedctl.u.rrobin.slice = slice;
-
     return do_dom0_op(xc_handle, &op);
 }
+
+
+int xc_rrobin_global_get(int xc_handle, u64 *slice)
+{
+    dom0_op_t op;
+    int ret;
+
+    op.cmd = DOM0_SCHEDCTL;
+    op.u.schedctl.sched_id = SCHED_RROBIN;
+    op.u.schedctl.direction = SCHED_INFO_GET;
+
+    ret = do_dom0_op(xc_handle, &op);
+
+    *slice = op.u.schedctl.u.rrobin.slice;
+
+    return ret;
+}
diff --git a/tools/xc/py/Xc.c b/tools/xc/py/Xc.c
index 96b9bf491a..3cbbe7efa6 100644
--- a/tools/xc/py/Xc.c
+++ b/tools/xc/py/Xc.c
@@ -281,6 +281,23 @@ static PyObject *pyxc_bvtsched_global_set(PyObject *self,
     return zero;
 }
 
+static PyObject *pyxc_bvtsched_global_get(PyObject *self,
+					  PyObject *args,
+					  PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    
+    unsigned long ctx_allow;
+    
+    if ( !PyArg_ParseTuple(args, "") )
+        return NULL;
+    
+    if ( xc_bvtsched_global_get(xc->xc_handle, &ctx_allow) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+    
+    return Py_BuildValue("s:l", "ctx_allow", ctx_allow);
+}
+
 static PyObject *pyxc_bvtsched_domain_set(PyObject *self,
                                           PyObject *args,
                                           PyObject *kwds)
@@ -305,6 +322,31 @@ static PyObject *pyxc_bvtsched_domain_set(PyObject *self,
     return zero;
 }
 
+static PyObject *pyxc_bvtsched_domain_get(PyObject *self,
+                                          PyObject *args,
+                                          PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    u64 dom;
+    unsigned long mcuadv, warp, warpl, warpu;
+    
+    static char *kwd_list[] = { "dom", NULL };
+
+    if ( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &dom) )
+        return NULL;
+    
+    if ( xc_bvtsched_domain_get(xc->xc_handle, dom, &mcuadv, &warp,
+                                &warpl, &warpu) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+
+    return Py_BuildValue("{s:L,s:l,s:l,s:l,s:l}",
+                         "domain", dom,
+                         "mcuadv", mcuadv,
+                         "warp",   warp,
+                         "warpl",  warpl,
+                         "warpu",  warpu);
+}
+
 static PyObject *pyxc_vif_scheduler_set(PyObject *self,
                                         PyObject *args,
                                         PyObject *kwds)
@@ -879,22 +921,52 @@ static PyObject *pyxc_atropos_domain_set(PyObject *self,
                                          PyObject *kwds)
 {
     XcObject *xc = (XcObject *)self;
-    int xtratime;
     u64 domid;
+    u64 period, slice, latency;
+    int xtratime;
 
-    static char *kwd_list[] = { "dom", "xtratime", NULL };
+    static char *kwd_list[] = { "dom", "period", "slice", "latency",
+				"xtratime", NULL };
     
-    if( !PyArg_ParseTupleAndKeywords(args, kwds, "Li", kwd_list, &domid,
-                                     &xtratime) )
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, "LLLLi", kwd_list, &domid,
+                                     &period, &slice, &latency, &xtratime) )
         return NULL;
    
-    if ( xc_atropos_domain_set(xc->xc_handle, domid, xtratime) != 0 )
+    if ( xc_atropos_domain_set(xc->xc_handle, domid, period, slice,
+			       latency, xtratime) != 0 )
         return PyErr_SetFromErrno(xc_error);
 
     Py_INCREF(zero);
     return zero;
 }
 
+static PyObject *pyxc_atropos_domain_get(PyObject *self,
+                                         PyObject *args,
+                                         PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    u64 domid;
+    u64 period, slice, latency;
+    int xtratime;
+    
+    static char *kwd_list[] = { "dom", NULL };
+
+    if( !PyArg_ParseTupleAndKeywords(args, kwds, "L", kwd_list, &domid) )
+        return NULL;
+    
+    if ( xc_atropos_domain_get( xc->xc_handle, domid, &period,
+                                &slice, &latency, &xtratime ) )
+        return PyErr_SetFromErrno(xc_error);
+
+    return Py_BuildValue("{s:L,s:L,s:L,s:L,s:i}",
+                         "domain",  domid,
+                         "period",  period,
+                         "slice",   slice,
+                         "latency", latency,
+                         "xtratime", xtratime);
+}
+
+
 static PyObject *pyxc_rrobin_global_set(PyObject *self,
                                         PyObject *args,
                                         PyObject *kwds)
@@ -914,6 +986,22 @@ static PyObject *pyxc_rrobin_global_set(PyObject *self,
     return zero;
 }
 
+static PyObject *pyxc_rrobin_global_get(PyObject *self,
+                                        PyObject *args,
+                                        PyObject *kwds)
+{
+    XcObject *xc = (XcObject *)self;
+    u64 slice;
+
+    if ( !PyArg_ParseTuple(args, "") )
+        return NULL;
+
+    if ( xc_rrobin_global_get(xc->xc_handle, &slice) != 0 )
+        return PyErr_SetFromErrno(xc_error);
+    
+    return Py_BuildValue("s:L", "slice", slice);
+}
+
 
 static PyMethodDef pyxc_methods[] = {
     { "domain_create", 
@@ -1015,6 +1103,13 @@ static PyMethodDef pyxc_methods[] = {
       " ctx_allow [int]: Minimal guaranteed quantum (I think!).\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
+    { "bvtsched_global_get",
+      (PyCFunction)pyxc_bvtsched_global_get,
+      METH_KEYWORDS, "\n"
+      "Get global tuning parameters for BVT scheduler.\n"
+      "Returns: [dict]:\n"
+      " ctx_allow [int]: context switch allowance\n" },
+
     { "bvtsched_domain_set",
       (PyCFunction)pyxc_bvtsched_domain_set,
       METH_VARARGS | METH_KEYWORDS, "\n"
@@ -1026,21 +1121,56 @@ static PyMethodDef pyxc_methods[] = {
       " warpu  [int]:  Internal BVT parameter.\n\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
+    { "bvtsched_domain_get",
+      (PyCFunction)pyxc_bvtsched_domain_get,
+      METH_KEYWORDS, "\n"
+      "Get per-domain tuning parameters under the BVT scheduler.\n"
+      " dom [long]: Identifier of domain to be queried.\n"
+      "Returns [dict]:\n"
+      " domain [long]: Domain ID.\n"
+      " mcuadv [long]: MCU Advance.\n"
+      " warp   [long]: Warp.\n"
+      " warpu  [long]:\n"
+      " warpl  [long]: Warp limit,\n"
+    },
+
     { "atropos_domain_set",
       (PyCFunction)pyxc_atropos_domain_set,
-      METH_VARARGS | METH_KEYWORDS, "\n"
-      "Set the extra time flag for a domain when running with Atropos.\n"
-      " dom [long]: domain to set\n"
+      METH_KEYWORDS, "\n"
+      "Set the scheduling parameters for a domain when running with Atropos.\n"
+      " dom      [long]: domain to set\n"
+      " period   [long]: domain's scheduling period\n"
+      " slice    [long]: domain's slice per period\n"
+      " latency  [long]: wakeup latency hint\n"
       " xtratime [int]: boolean\n"
       "Returns: [int] 0 on success; -1 on error.\n" },
 
+    { "atropos_domain_get",
+      (PyCFunction)pyxc_atropos_domain_get,
+      METH_KEYWORDS, "\n"
+      "Get the current scheduling parameters for a domain when running with\n"
+      "the Atropos scheduler."
+      " dom      [long]: domain to query\n"
+      "Returns:  [dict]\n"
+      " domain   [long]: domain ID\n"
+      " period   [long]: scheduler period\n"
+      " slice    [long]: CPU reservation per period\n"
+      " latency  [long]: unblocking latency hint\n"
+      " xtratime [int] : 0 if not using slack time, nonzero otherwise\n" },
+
     { "rrobin_global_set",
       (PyCFunction)pyxc_rrobin_global_set,
       METH_KEYWORDS, "\n"
       "Set Round Robin scheduler slice.\n"
       " slice [long]: Round Robin scheduler slice\n"
-      "Returns: [int] 0 on success, throws an exception on failure\n"
-    },
+      "Returns: [int] 0 on success, throws an exception on failure\n" },
+
+    { "rrobin_global_get",
+      (PyCFunction)pyxc_rrobin_global_get,
+      METH_KEYWORDS, "\n"
+      "Get Round Robin scheduler settings\n"
+      "Returns [dict]:\n"
+      " slice  [long]: Scheduler time slice.\n" },    
 
     { "vif_scheduler_set", 
       (PyCFunction)pyxc_vif_scheduler_set, 
diff --git a/xen/common/dom0_ops.c b/xen/common/dom0_ops.c
index a5f7041f0b..4c1a3cdefd 100644
--- a/xen/common/dom0_ops.c
+++ b/xen/common/dom0_ops.c
@@ -200,12 +200,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
     case DOM0_SCHEDCTL:
     {
         ret = sched_ctl(&op->u.schedctl);
+        copy_to_user(u_dom0_op, op, sizeof(*op));
     }
     break;
 
     case DOM0_ADJUSTDOM:
     {
         ret = sched_adjdom(&op->u.adjustdom);
+        copy_to_user(u_dom0_op, op, sizeof(*op));
     }
     break;
 
@@ -275,7 +277,6 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
             if ( (p->state == TASK_STOPPED) || (p->state == TASK_DYING) )
                 op->u.getdomaininfo.state = DOMSTATE_STOPPED;
             op->u.getdomaininfo.hyp_events  = p->hyp_events;
-//            op->u.getdomaininfo.mcu_advance = p->mcu_advance;
             op->u.getdomaininfo.tot_pages   = p->tot_pages;
             op->u.getdomaininfo.cpu_time    = p->cpu_time;
             op->u.getdomaininfo.shared_info_frame = 
@@ -485,6 +486,14 @@ long do_dom0_op(dom0_op_t *u_dom0_op)
                                         op->u.pcidev_access.enable);
     }
     break;
+
+    case DOM0_SCHED_ID:
+    {
+        op->u.sched_id.sched_id = sched_id();
+
+        copy_to_user(u_dom0_op, op, sizeof(*op));
+        ret = 0;        
+    }
      
     default:
         ret = -ENOSYS;
diff --git a/xen/common/keyhandler.c b/xen/common/keyhandler.c
index e2eed7a85c..734df5cffa 100644
--- a/xen/common/keyhandler.c
+++ b/xen/common/keyhandler.c
@@ -4,6 +4,7 @@
 #include <xen/event.h>
 #include <xen/console.h>
 #include <xen/serial.h>
+#include <xen/sched.h>
 
 #define KEY_MAX 256
 #define STR_MAX  64
@@ -74,29 +75,6 @@ static void kill_dom0(u_char key, void *dev_id, struct pt_regs *regs)
     kill_other_domain(0, 0);
 }
 
-
-/* XXX SMH: this is keir's fault */
-static char *task_states[] = 
-{ 
-    "Runnable  ", 
-    "Int Sleep ", 
-    "UInt Sleep", 
-    NULL,
-    "Stopped   ", 
-    NULL,
-    NULL,
-    NULL,
-    "Dying     ",
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    NULL,
-    "Sched priv"
-}; 
-
 void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs) 
 {
     unsigned long       flags;
@@ -111,10 +89,10 @@ void do_task_queues(u_char key, void *dev_id, struct pt_regs *regs)
 
     for_each_domain ( p )
     {
-        printk("Xen: DOM %llu, CPU %d [has=%c], state = %s, "
-               "hyp_events = %08x\n", 
-               p->domain, p->processor, p->has_cpu ? 'T':'F', 
-               task_states[p->state], p->hyp_events); 
+        printk("Xen: DOM %llu, CPU %d [has=%c], state = ",
+               p->domain, p->processor, p->has_cpu ? 'T':'F'); 
+        sched_prn_state(p ->state);
+	printk(", hyp_events = %08x\n", p->hyp_events);
         s = p->shared_info; 
         printk("Guest: upcall_pend = %08lx, upcall_mask = %08lx\n", 
                s->evtchn_upcall_pending, s->evtchn_upcall_mask);
diff --git a/xen/common/sched_atropos.c b/xen/common/sched_atropos.c
new file mode 100644
index 0000000000..1a5fd792aa
--- /dev/null
+++ b/xen/common/sched_atropos.c
@@ -0,0 +1,598 @@
+/*
+ *	atropos.c
+ *	---------
+ *
+ * Copyright (c) 1994 University of Cambridge Computer Laboratory.
+ * This is part of Nemesis; consult your contract for terms and conditions.
+ *
+ * ID : $Id: atropos.c 1.1 Tue, 13 Apr 1999 13:30:49 +0100 dr10009 $
+ *
+ * This is the "atropos" CPU scheduler. 
+ */
+
+/* Ported to Xen's generic scheduler interface by Mark Williamson
+ * these modifications are (C) 2004 Intel Research Cambridge
+ */
+
+#include <xen/time.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+#include <hypervisor-ifs/sched_ctl.h>
+#include <xen/trace.h>
+
+#define ATROPOS_TASK_UNBLOCKED 16
+#define ATROPOS_TASK_WAIT      32
+
+#define Activation_Reason_Allocated 1
+#define Activation_Reason_Preempted 2
+#define Activation_Reason_Extra     3
+
+/* The following will be used for atropos-specific per-domain data fields */
+struct at_dom_info
+{
+    /* MAW Xen additions */
+    struct task_struct *owner; /* the struct task_struct this data belongs to */
+    struct list_head waitq;    /* wait queue                                  */
+    int reason;                /* reason domain was last scheduled            */
+
+    /* (what remains of) the original fields */
+
+    s_time_t     deadline;       /* Next deadline                */
+    s_time_t     prevddln;       /* Previous deadline            */
+    
+    s_time_t     remain;         /* Time remaining this period   */
+    s_time_t     period;         /* Period of time allocation    */
+    s_time_t     slice;          /* Length of allocation         */
+    s_time_t     latency;        /* Unblocking latency           */
+
+    int          xtratime;       /* Prepared to accept extra?    */
+};
+
+
+struct at_cpu_info
+{
+    struct list_head waitq; /* wait queue*/
+};
+
+
+#define DOM_INFO(_p) ( (struct at_dom_info *)((_p)->sched_priv) )
+#define CPU_INF(_p)  ( (struct at_cpu_info *)((_p).sched_priv) )
+#define WAITQ(cpu)   (&( CPU_INF(schedule_data[cpu]) )->waitq )
+#define RUNQ(cpu)    (&schedule_data[cpu].runqueue)
+
+#define BESTEFFORT_QUANTUM MILLISECS(5)
+
+/* SLAB cache for struct at_dom_info objects */
+static kmem_cache_t *dom_info_cache;
+
+/** calculate the length of a linked list */
+static int q_len(struct list_head *q) 
+{
+    int i = 0;
+    struct list_head *tmp;
+    list_for_each(tmp, q) i++;
+    return i;
+}
+
+
+/** waitq_el - get the task_struct that owns a wait queue list element */
+static inline struct task_struct * waitq_el(struct list_head *l)
+{
+    struct at_dom_info *inf;
+    inf = list_entry(l, struct at_dom_info, waitq);
+    return inf->owner;
+}
+
+
+/*
+ * requeue
+ *
+ * Places the specified domain on the appropriate queue.
+ * The wait queue is ordered by the time at which the domain
+ * will receive more CPU time.  If a domain has no guaranteed time
+ * left then the domain will be placed on the WAIT queue until
+ * its next period. 
+ *
+ * Note that domains can be on the wait queue with remain > 0 
+ * as a result of being blocked for a short time.
+ * These are scheduled in preference to domains with remain < 0 
+ * in an attempt to improve interactive performance.
+ */
+static void requeue(struct task_struct *sdom)
+{
+    struct at_dom_info *inf = DOM_INFO(sdom);
+    struct list_head *prev = WAITQ(sdom->processor);
+    struct list_head *next;
+
+    if(sdom->state == ATROPOS_TASK_WAIT ||
+       sdom->state == ATROPOS_TASK_UNBLOCKED )
+    {
+        /* insert into ordered wait queue */
+
+        prev = WAITQ(sdom->processor);
+        list_for_each(next, WAITQ(sdom->processor))
+        {
+            struct at_dom_info *i = list_entry(next, struct at_dom_info, waitq);
+            if( i->deadline > inf->deadline )
+            {
+                __list_add(&inf->waitq, prev, next);
+                break;
+            }
+
+            prev = next;
+        }
+
+        /* put the domain on the end of the list if it hasn't been put
+         * elsewhere */
+        if ( next == WAITQ(sdom->processor))
+            list_add_tail(&inf->waitq, WAITQ(sdom->processor));
+    }
+    else if(sdom->state == TASK_RUNNING)
+    {
+        /* insert into ordered run queue */
+        prev = RUNQ(sdom->processor);
+
+        list_for_each(next, RUNQ(sdom->processor))
+        {
+            struct task_struct *p = list_entry(next, struct task_struct,
+                                               run_list);
+
+            if( DOM_INFO(p)->deadline > inf->deadline || is_idle_task(p) )
+            {
+                __list_add(&sdom->run_list, prev, next);
+                break;
+            }
+
+            prev = next;
+        }
+
+        if ( next == RUNQ(sdom->processor) )
+            list_add_tail(&sdom->run_list, RUNQ(sdom->processor));
+    }
+    /* silently ignore tasks in other states like BLOCKED, DYING, STOPPED, etc
+     * - they shouldn't be on any queue */
+}
+
+/* prepare a task to be added to scheduling */
+static void at_add_task(struct task_struct *p)
+{
+    s_time_t now = NOW();
+
+    ASSERT( p->sched_priv != NULL );
+
+    DOM_INFO(p)->owner = p;
+    p->lastschd = now;
+ 
+    if(is_idle_task(p))
+      DOM_INFO(p)->slice = MILLISECS(5);
+
+    /* DOM 0's scheduling parameters must be set here in order for it to boot
+     * the system! */
+    if(p->domain == 0)
+    {
+        DOM_INFO(p)->remain = MILLISECS(15);
+        DOM_INFO(p)->period = MILLISECS(20);
+        DOM_INFO(p)->slice  = MILLISECS(15);
+        DOM_INFO(p)->latency = MILLISECS(10);
+        DOM_INFO(p)->xtratime = 1;
+        DOM_INFO(p)->deadline = now;
+        DOM_INFO(p)->prevddln = now;
+    }
+    else /* other domains run basically best effort unless otherwise set */
+    {
+        DOM_INFO(p)->remain = 0;
+        DOM_INFO(p)->period = MILLISECS(10000);
+        DOM_INFO(p)->slice  = MILLISECS(10);
+        DOM_INFO(p)->latency = MILLISECS(10000);
+        DOM_INFO(p)->xtratime = 1;
+        DOM_INFO(p)->deadline = now + MILLISECS(10000);
+        DOM_INFO(p)->prevddln = 0;
+    }
+
+    INIT_LIST_HEAD(&(DOM_INFO(p)->waitq));
+}
+
+
+/**
+ * dequeue - remove a domain from any queues it is on.
+ * @sdom:    the task to remove
+ */
+static void dequeue(struct task_struct *sdom)
+{
+    struct at_dom_info *inf = DOM_INFO(sdom);
+
+    ASSERT(sdom->domain != IDLE_DOMAIN_ID);
+    
+    /* just delete it from all the queues! */
+    list_del(&inf->waitq);
+    INIT_LIST_HEAD(&inf->waitq);
+    
+    if(__task_on_runqueue(sdom))
+        __del_from_runqueue(sdom);
+
+    sdom->run_list.next = NULL;
+    sdom->run_list.prev = NULL;
+
+}
+
+
+/*
+ * unblock
+ *
+ * This function deals with updating the sdom for a domain
+ * which has just been unblocked.  
+ *
+ * ASSERT: On entry, the sdom has already been removed from the block
+ * queue (it can be done more efficiently if we know that it
+ * is on the head of the queue) but its deadline field has not been
+ * restored yet.
+ */
+static void unblock(struct task_struct *sdom)
+{
+    s_time_t time = NOW();
+    struct at_dom_info *inf = DOM_INFO(sdom);
+    
+    dequeue(sdom);
+
+    /* We distinguish two cases... short and long blocks */
+    if ( inf->deadline < time ) {
+	/* The sdom has passed its deadline since it was blocked. 
+	   Give it its new deadline based on the latency value. */
+	inf->prevddln = time; 
+	inf->deadline = time + inf->latency;
+	inf->remain   = inf->slice;
+        if(inf->remain > 0)
+            sdom->state = TASK_RUNNING;
+        else
+            sdom->state = ATROPOS_TASK_WAIT;
+        
+    } else {
+	/* We leave REMAIN intact, but put this domain on the WAIT
+	   queue marked as recently unblocked.  It will be given
+	   priority over other domains on the wait queue until while
+	   REMAIN>0 in a generous attempt to help it make up for its
+	   own foolishness. */
+	if(inf->remain > 0)
+            sdom->state = ATROPOS_TASK_UNBLOCKED;
+        else
+            sdom->state = ATROPOS_TASK_WAIT;
+    }
+
+    requeue(sdom);
+
+}
+
+/**
+ * ATROPOS - main scheduler function
+ */
+task_slice_t ksched_scheduler(s_time_t time)
+{
+    struct task_struct	*cur_sdom = current;  /* Current sdom           */
+    s_time_t     newtime;
+    s_time_t      ranfor;	        /* How long the domain ran      */
+    struct task_struct	*sdom;	        /* tmp. scheduling domain	*/
+    int   reason;                       /* reason for reschedule        */
+    int cpu = cur_sdom->processor;      /* current CPU                  */
+    struct at_dom_info *cur_info;
+    static unsigned long waitq_rrobin = 0;
+    int i;
+    task_slice_t ret;
+
+    cur_info = DOM_INFO(cur_sdom);
+
+    ASSERT( cur_sdom != NULL);
+
+    /* If we were spinning in the idle loop, there is no current
+     * domain to deschedule. */
+    if (is_idle_task(cur_sdom)) {
+	goto deschedule_done;
+    }
+
+    /*****************************
+     * 
+     * Deschedule the current scheduling domain
+     *
+     ****************************/
+
+   /* Record the time the domain was preempted and for how long it
+       ran.  Work out if the domain is going to be blocked to save
+       some pointless queue shuffling */
+    cur_sdom->lastdeschd = time;
+
+    ranfor = (time - cur_sdom->lastschd);
+
+    dequeue(cur_sdom);
+
+    if ((cur_sdom->state == TASK_RUNNING) ||
+        (cur_sdom->state == ATROPOS_TASK_UNBLOCKED)) {
+
+	/* In this block, we are doing accounting for an sdom which has 
+	   been running in contracted time.  Note that this could now happen
+	   even if the domain is on the wait queue (i.e. if it blocked) */
+
+	/* Deduct guaranteed time from the domain */
+	cur_info->remain  -= ranfor;
+
+	/* If guaranteed time has run out... */
+	if ( cur_info->remain <= 0 ) {
+	    /* Move domain to correct position in WAIT queue */
+            /* XXX sdom_unblocked doesn't need this since it is 
+	     already in the correct place. */
+	    cur_sdom->state = ATROPOS_TASK_WAIT;
+	}
+    }
+
+    requeue(cur_sdom);
+
+  deschedule_done:
+
+    /*****************************
+     * 
+     * We have now successfully descheduled the current sdom.
+     * The next task is the allocate CPU time to any sdom it is due to.
+     *
+       ****************************/
+    cur_sdom = NULL;
+
+    /*****************************
+     * 
+     * Allocate CPU time to any waiting domains who have passed their
+     * period deadline.  If necessary, move them to run queue.
+     *
+     ****************************/
+    while(!list_empty(WAITQ(cpu)) && 
+	  DOM_INFO(sdom = waitq_el(WAITQ(cpu)->next))->deadline <= time ) {
+
+	struct at_dom_info *inf = DOM_INFO(sdom);
+
+        dequeue(sdom);
+
+	/* Domain begins a new period and receives a slice of CPU 
+	 * If this domain has been blocking then throw away the
+	 * rest of it's remain - it can't be trusted */
+	if (inf->remain > 0) 
+	    inf->remain = inf->slice;
+    	else 
+	    inf->remain += inf->slice;
+	inf->prevddln = inf->deadline;
+	inf->deadline += inf->period;
+        if(inf->remain > 0)
+            sdom->state = TASK_RUNNING;
+        else
+            sdom->state = ATROPOS_TASK_WAIT;
+
+	/* Place on the appropriate queue */
+	requeue(sdom);
+    }
+
+    /*****************************
+     * 
+     * Next we need to pick an sdom to run.
+     * If anything is actually 'runnable', we run that. 
+     * If nothing is, we pick a waiting sdom to run optimistically.
+     * If there aren't even any of those, we have to spin waiting for an
+     * event or a suitable time condition to happen.
+     *
+     ****************************/
+    
+    /* we guarantee there's always something on the runqueue */
+    cur_sdom = list_entry(RUNQ(cpu)->next,
+                          struct task_struct, run_list);
+
+    cur_info = DOM_INFO(cur_sdom);
+    newtime = time + cur_info->remain;
+    reason  = (cur_info->prevddln > cur_sdom->lastschd) ?
+      Activation_Reason_Allocated : Activation_Reason_Preempted;
+
+    /* MAW - the idle domain is always on the run queue.  We run from the
+     * runqueue if it's NOT the idle domain or if there's nothing on the wait
+     * queue */
+    if (cur_sdom->domain == IDLE_DOMAIN_ID && !list_empty(WAITQ(cpu))) {
+
+        struct list_head *item;
+
+	/* Try running a domain on the WAIT queue - this part of the
+	   scheduler isn't particularly efficient but then again, we
+	   don't have any guaranteed domains to worry about. */
+	
+	/* See if there are any unblocked domains on the WAIT
+	   queue who we can give preferential treatment to. */
+        list_for_each(item, WAITQ(cpu))
+        {
+            struct at_dom_info *inf =
+                list_entry(item, struct at_dom_info, waitq);
+
+            sdom = inf->owner;
+            
+	    if (sdom->state == ATROPOS_TASK_UNBLOCKED) {
+		cur_sdom = sdom;
+		cur_info  = inf;
+		newtime  = time + inf->remain;
+		reason   = Activation_Reason_Preempted;
+		goto found;
+	    }
+	}
+
+        /* init values needed to approximate round-robin for slack time */
+        i = 0;
+        if ( waitq_rrobin >= q_len(WAITQ(cpu)))
+            waitq_rrobin = 0;
+        
+	/* Last chance: pick a domain on the wait queue with the XTRA
+	   flag set.  The NEXT_OPTM field is used to cheaply achieve
+	   an approximation of round-robin order */
+        list_for_each(item, WAITQ(cpu))
+            {
+                struct at_dom_info *inf =
+                    list_entry(item, struct at_dom_info, waitq);
+                
+                sdom = inf->owner;
+
+                if (inf->xtratime && i >= waitq_rrobin) {
+                    cur_sdom = sdom;
+                    cur_info  = inf;
+                    newtime = time + BESTEFFORT_QUANTUM;
+                    reason  = Activation_Reason_Extra;
+                    waitq_rrobin = i + 1; /* set this value ready for next */
+                    goto found;
+                }
+
+                i++;
+            }
+
+    }
+
+    found:
+    /**********************
+     * 
+     * We now have to work out the time when we next need to
+     * make a scheduling decision.  We set the alarm timer
+     * to cause an interrupt at that time.
+     *
+     **********************/
+
+#define MIN(x,y) ( ( x < y ) ? x : y )
+#define MAX(x,y) ( ( x > y ) ? x : y )
+
+    /* If we might be able to run a waiting domain before this one has */
+    /* exhausted its time, cut short the time allocation */
+    if (!list_empty(WAITQ(cpu)))
+    {
+	newtime = MIN(newtime,
+                      DOM_INFO(waitq_el(WAITQ(cpu)->next))->deadline);
+    }
+
+    /* don't allow pointlessly small time slices */
+    newtime = MAX(newtime, time + BESTEFFORT_QUANTUM);
+    
+    ret.task = cur_sdom;
+    ret.time = newtime - time;
+
+    cur_sdom->min_slice = newtime - time;
+    DOM_INFO(cur_sdom)->reason = reason;
+
+    TRACE_2D(0, cur_sdom->domain >> 32, (u32)cur_sdom->domain);
+ 
+    return ret;
+}
+
+
+/* set up some private data structures */
+static int at_init_scheduler()
+{
+    int i;
+    
+    for( i = 0; i < NR_CPUS; i++)
+    {
+        if( (CPU_INF(schedule_data[i]) = kmalloc(sizeof(struct at_cpu_info),
+                                            GFP_KERNEL)) == NULL )
+            return -1;
+        WAITQ(i)->next = WAITQ(i);
+        WAITQ(i)->prev = WAITQ(i);
+    }
+
+    dom_info_cache = kmem_cache_create("Atropos dom info",
+                                       sizeof(struct at_dom_info),
+                                       0, 0, NULL, NULL);
+
+    return 0;
+}
+
+/* dump relevant per-cpu state for a run queue dump */
+static void at_dump_cpu_state(int cpu)
+{
+    printk("Waitq len: %d Runq len: %d ",
+           q_len(WAITQ(cpu)),
+           q_len(RUNQ(cpu)));
+}
+
+/* print relevant per-domain info for a run queue dump */
+static void at_dump_runq_el(struct task_struct *p)
+{
+    printk("lastschd = %llu, xtratime = %d ",
+           p->lastschd, DOM_INFO(p)->xtratime);
+}
+
+
+/* set or fetch domain scheduling parameters */
+static int at_adjdom(struct task_struct *p, struct sched_adjdom_cmd *cmd)
+{
+    if ( cmd->direction == SCHED_INFO_PUT )
+    {
+        DOM_INFO(p)->period   = cmd->u.atropos.period;
+        DOM_INFO(p)->slice    = cmd->u.atropos.slice;
+        DOM_INFO(p)->latency  = cmd->u.atropos.latency;
+        DOM_INFO(p)->xtratime = !!cmd->u.atropos.xtratime;
+    }
+    else if ( cmd->direction == SCHED_INFO_GET )
+    {
+        cmd->u.atropos.period   = DOM_INFO(p)->period;
+        cmd->u.atropos.slice    = DOM_INFO(p)->slice;
+        cmd->u.atropos.latency  = DOM_INFO(p)->latency;
+        cmd->u.atropos.xtratime = DOM_INFO(p)->xtratime;
+    }
+
+    return 0;
+}
+
+
+/** at_alloc_task - allocate private info for a task */
+static int at_alloc_task(struct task_struct *p)
+{
+    ASSERT(p != NULL);
+
+    if( (DOM_INFO(p) = kmem_cache_alloc(dom_info_cache, GFP_KERNEL)) == NULL )
+        return -1;
+
+    if(p->domain == IDLE_DOMAIN_ID)
+      printk("ALLOC IDLE ON CPU %d\n", p->processor);
+
+    memset(DOM_INFO(p), 0, sizeof(struct at_dom_info));
+
+    return 0;
+}
+
+
+/* free memory associated with a task */
+static void at_free_task(struct task_struct *p)
+{
+    kmem_cache_free( dom_info_cache, DOM_INFO(p) );
+}
+
+/* print decoded domain private state value (if known) */
+static int at_prn_state(int state)
+{
+    int ret = 0;
+    
+    switch(state)
+    {
+    case ATROPOS_TASK_UNBLOCKED:
+        printk("Unblocked");
+        break;
+    case ATROPOS_TASK_WAIT:
+        printk("Wait");
+        break;
+    default:
+        ret = -1;
+    }
+
+    return ret;
+}
+    
+
+struct scheduler sched_atropos_def = {
+    .name           = "Atropos Soft Real Time Scheduler",
+    .opt_name       = "atropos",
+    .sched_id       = SCHED_ATROPOS,
+
+    .init_scheduler = at_init_scheduler,
+    .alloc_task     = at_alloc_task,
+    .add_task       = at_add_task,
+    .free_task      = at_free_task,
+    .wake_up        = unblock,
+    .do_schedule    = ksched_scheduler,
+    .adjdom         = at_adjdom,
+    .dump_cpu_state = at_dump_cpu_state,
+    .dump_runq_el   = at_dump_runq_el,
+    .prn_state      = at_prn_state,
+};
diff --git a/xen/common/sched_bvt.c b/xen/common/sched_bvt.c
index 3052d1e98c..4e77d58948 100644
--- a/xen/common/sched_bvt.c
+++ b/xen/common/sched_bvt.c
@@ -151,6 +151,7 @@ void bvt_wake_up(struct task_struct *p)
     struct bvt_dom_info *inf = BVT_INFO(p);
 
     ASSERT(inf != NULL);
+    
 
     /* set the BVT parameters */
     if (inf->avt < CPU_SVT(p->processor))
@@ -166,19 +167,25 @@ void bvt_wake_up(struct task_struct *p)
 /* 
  * Block the currently-executing domain until a pertinent event occurs.
  */
-static long bvt_do_block(struct task_struct *p)
+static void bvt_do_block(struct task_struct *p)
 {
     BVT_INFO(p)->warpback = 0; 
-    return 0;
 }
 
 /* Control the scheduler. */
 int bvt_ctl(struct sched_ctl_cmd *cmd)
 {
     struct bvt_ctl *params = &cmd->u.bvt;
-    
-    ctx_allow = params->ctx_allow;
 
+    if ( cmd->direction == SCHED_INFO_PUT )
+    { 
+        ctx_allow = params->ctx_allow;
+    }
+    else
+    {
+        params->ctx_allow = ctx_allow;
+    }
+    
     return 0;
 }
 
@@ -187,24 +194,40 @@ int bvt_adjdom(struct task_struct *p,
                struct sched_adjdom_cmd *cmd)
 {
     struct bvt_adjdom *params = &cmd->u.bvt;
-    unsigned long mcu_adv = params->mcu_adv,
-                    warp  = params->warp,
-                    warpl = params->warpl,
-                    warpu = params->warpu;
-    
-    struct bvt_dom_info *inf = BVT_INFO(p);
-
-    /* Sanity -- this can avoid divide-by-zero. */
-    if ( mcu_adv == 0 )
-        return -EINVAL;
-
-    spin_lock_irq(&schedule_lock[p->processor]);   
-    inf->mcu_advance = mcu_adv;
-    inf->warp = warp;
-    inf->warpl = warpl;
-    inf->warpu = warpu;
-    spin_unlock_irq(&schedule_lock[p->processor]); 
+    unsigned long flags;
 
+    if ( cmd->direction == SCHED_INFO_PUT )
+    {
+        unsigned long mcu_adv = params->mcu_adv,
+            warp  = params->warp,
+            warpl = params->warpl,
+            warpu = params->warpu;
+        
+        struct bvt_dom_info *inf = BVT_INFO(p);
+        
+        /* Sanity -- this can avoid divide-by-zero. */
+        if ( mcu_adv == 0 )
+            return -EINVAL;
+        
+        spin_lock_irqsave(&schedule_lock[p->processor], flags);   
+        inf->mcu_advance = mcu_adv;
+        inf->warp = warp;
+        inf->warpl = warpl;
+        inf->warpu = warpu;
+        spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
+    }
+    else if ( cmd->direction == SCHED_INFO_GET )
+    {
+        struct bvt_dom_info *inf = BVT_INFO(p);
+
+        spin_lock_irqsave(&schedule_lock[p->processor], flags);   
+        params->mcu_adv = inf->mcu_advance;
+        params->warp    = inf->warp;
+        params->warpl   = inf->warpl;
+        params->warpu   = inf->warpu;
+        spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
+    }
+    
     return 0;
 }
 
diff --git a/xen/common/sched_rrobin.c b/xen/common/sched_rrobin.c
index 544803f1fa..73d73bf899 100644
--- a/xen/common/sched_rrobin.c
+++ b/xen/common/sched_rrobin.c
@@ -1,5 +1,5 @@
 /****************************************************************************
- * Very stupid Round Robin Scheduler for Xen
+ * Round Robin Scheduler for Xen
  *
  * by Mark Williamson (C) 2004 Intel Research Cambridge
  */
@@ -33,7 +33,15 @@ static task_slice_t rr_do_schedule(s_time_t now)
 
 static int rr_ctl(struct sched_ctl_cmd *cmd)
 {
-    rr_slice = cmd->u.rrobin.slice;
+    if(cmd->direction == SCHED_INFO_PUT)
+    {
+        rr_slice = cmd->u.rrobin.slice;
+    }
+    else /* cmd->direction == SCHED_INFO_GET */
+    {
+        cmd->u.rrobin.slice = rr_slice;
+    }
+    
     return 0;
 }
 
diff --git a/xen/common/schedule.c b/xen/common/schedule.c
index 7b06d3a109..496b35b9a8 100644
--- a/xen/common/schedule.c
+++ b/xen/common/schedule.c
@@ -41,7 +41,7 @@
 #define TIME_SLOP      (s32)MICROSECS(50)     /* allow time to slip a bit */
 
 /*
- * XXX Pull trace-related #defines out of here and into an auto-generated
+ * TODO MAW pull trace-related #defines out of here and into an auto-generated
  * header file later on!
  */
 #define TRC_SCHED_DOM_ADD             0x00010000
@@ -68,23 +68,25 @@ static void t_timer_fn(unsigned long unused);
 static void dom_timer_fn(unsigned long data);
 static void fallback_timer_fn(unsigned long unused);
 
-/* This is global for now so that private implementations can reach it. */
+/* This is global for now so that private implementations can reach it */
 schedule_data_t schedule_data[NR_CPUS];
 
 /*
- * XXX It would be nice if the schedulers array could get populated
+ * TODO: It would be nice if the schedulers array could get populated
  * automagically without having to hack the code in here.
  */
-extern struct scheduler sched_bvt_def, sched_rrobin_def;
+extern struct scheduler sched_bvt_def, sched_rrobin_def, sched_atropos_def;
 static struct scheduler *schedulers[] = { &sched_bvt_def,
                                           &sched_rrobin_def,
+                                          &sched_atropos_def,
                                           NULL};
 
 /* Operations for the current scheduler. */
 static struct scheduler ops;
 
-#define SCHED_FN(fn, ...) \
-    ((ops.fn != NULL) ? (ops.fn(__VA_ARGS__)) : (typeof(ops.fn(__VA_ARGS__)))0)
+#define SCHED_OP(fn, ...)                                 \
+         (( ops.fn != NULL ) ? ops.fn( __VA_ARGS__ )      \
+          : (typeof(ops.fn(__VA_ARGS__)))0 )
 
 spinlock_t schedule_lock[NR_CPUS] __cacheline_aligned;
 
@@ -101,7 +103,7 @@ extern kmem_cache_t *task_struct_cachep;
 
 void free_task_struct(struct task_struct *p)
 {
-    SCHED_FN(free_task, p);
+    SCHED_OP(free_task, p);
     kmem_cache_free(task_struct_cachep, p);
 }
 
@@ -114,15 +116,15 @@ struct task_struct *alloc_task_struct(void)
 
     if ( (p = kmem_cache_alloc(task_struct_cachep,GFP_KERNEL)) == NULL )
         return NULL;
+    
+    memset(p, 0, sizeof(*p));
 
-    memset(p, 0, sizeof(*p));    
-
-    if ( SCHED_FN(alloc_task, p) < 0)
+    if ( SCHED_OP(alloc_task, p) < 0 )
     {
-        kmem_cache_free(task_struct_cachep, p);
+        kmem_cache_free(task_struct_cachep,p);
         return NULL;
     }
-    
+
     return p;
 }
 
@@ -146,7 +148,7 @@ void sched_add_domain(struct task_struct *p)
         schedule_data[p->processor].idle = p;
     }
 
-    SCHED_FN(add_task, p);
+    SCHED_OP(add_task, p);
 
     TRACE_3D(TRC_SCHED_DOM_ADD, _HIGH32(p->domain), _LOW32(p->domain), p);
 }
@@ -160,7 +162,7 @@ int sched_rem_domain(struct task_struct *p)
 
     rem_ac_timer(&p->timer);
 
-    SCHED_FN(rem_task, p);
+    SCHED_OP(rem_task, p);
 
     TRACE_3D(TRC_SCHED_DOM_REM, _HIGH32(p->domain), _LOW32(p->domain), p);
 
@@ -173,9 +175,9 @@ void init_idle_task(void)
     unsigned long flags;
     struct task_struct *p = current;
 
-    if ( SCHED_FN(alloc_task, p) < 0 )
-        panic("Failed to allocate scheduler private data for idle task");
-    SCHED_FN(add_task, p);
+    if ( SCHED_OP(alloc_task, p) < 0)
+		panic("Failed to allocate scheduler private data for idle task");
+    SCHED_OP(add_task, p);
 
     spin_lock_irqsave(&schedule_lock[p->processor], flags);
     p->has_cpu = 1;
@@ -191,12 +193,12 @@ void __wake_up(struct task_struct *p)
 
     ASSERT(p->state != TASK_DYING);
 
-    if ( unlikely(__task_on_runqueue(p)) )
+    if ( unlikely(__task_on_runqueue(p)) )        
         return;
 
     p->state = TASK_RUNNING;
 
-    SCHED_FN(wake_up, p);
+    SCHED_OP(wake_up, p);
 
 #ifdef WAKEUP_HISTO
     p->wokenup = NOW();
@@ -300,15 +302,12 @@ long do_set_timer_op(unsigned long timeout_hi, unsigned long timeout_lo)
     return 0;
 }
 
+/** sched_id - fetch ID of current scheduler */
+int sched_id()
+{
+    return ops.sched_id;
+}
 
-/**
- * sched_ctl - dispatch a scheduler control operation
- * @cmd:       the command passed in the dom0 op
- *
- * Given a generic scheduler control operation, call the control function for
- * the scheduler in use, passing the appropriate control information from the
- * union supplied.
- */
 long sched_ctl(struct sched_ctl_cmd *cmd)
 {
     TRACE_0D(TRC_SCHED_CTL);
@@ -316,7 +315,7 @@ long sched_ctl(struct sched_ctl_cmd *cmd)
     if ( cmd->sched_id != ops.sched_id )
         return -EINVAL;
 
-    return SCHED_FN(control, cmd);
+    return SCHED_OP(control, cmd);
 }
 
 
@@ -328,6 +327,9 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd)
     if ( cmd->sched_id != ops.sched_id )
         return -EINVAL;
 
+    if ( cmd->direction != SCHED_INFO_PUT && cmd->direction != SCHED_INFO_GET )
+        return -EINVAL;
+
     p = find_domain_by_id(cmd->domain);
 
     if( p == NULL )
@@ -335,7 +337,7 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd)
 
     TRACE_2D(TRC_SCHED_ADJDOM, _HIGH32(p->domain), _LOW32(p->domain));
 
-    SCHED_FN(adjdom, p, cmd);
+    SCHED_OP(adjdom, p, cmd);
 
     put_task_struct(p); 
     return 0;
@@ -351,7 +353,7 @@ long sched_adjdom(struct sched_adjdom_cmd *cmd)
  */
 unsigned long __reschedule(struct task_struct *p)
 {
-    int cpu = p->processor;
+       int cpu = p->processor;
     struct task_struct *curr;
     s_time_t now, min_time;
 
@@ -376,7 +378,7 @@ unsigned long __reschedule(struct task_struct *p)
     if ( schedule_data[cpu].s_timer.expires > min_time + TIME_SLOP )
         mod_ac_timer(&schedule_data[cpu].s_timer, min_time);
 
-    return SCHED_FN(reschedule, p);
+    return SCHED_OP(reschedule, p);
 }
 
 void reschedule(struct task_struct *p)
@@ -385,6 +387,7 @@ void reschedule(struct task_struct *p)
 
     spin_lock_irqsave(&schedule_lock[p->processor], flags);
     cpu_mask = __reschedule(p);
+
     spin_unlock_irqrestore(&schedule_lock[p->processor], flags);
 
 #ifdef CONFIG_SMP
@@ -420,7 +423,6 @@ asmlinkage void __enter_scheduler(void)
     ASSERT(!in_interrupt());
     ASSERT(__task_on_runqueue(prev));
     ASSERT(prev->state != TASK_UNINTERRUPTIBLE);
-    ASSERT(prev != NULL);
 
     if ( prev->state == TASK_INTERRUPTIBLE )
     {
@@ -428,19 +430,16 @@ asmlinkage void __enter_scheduler(void)
         if ( signal_pending(prev) )
             prev->state = TASK_RUNNING;
         else
-            SCHED_FN(do_block, prev);
+            SCHED_OP(do_block, prev);
     }
 
+    prev->cpu_time += now - prev->lastschd;
+
     /* get policy-specific decision on scheduling... */
     next_slice = ops.do_schedule(now);
 
     r_time = next_slice.time;
-    next   = next_slice.task;
-
-    if ( likely(!is_idle_task(prev)) ) 
-        prev->cpu_time += (now - prev->lastschd);
-
-    /* now, switch to the new task... */
+    next = next_slice.task;
 
     prev->has_cpu = 0;
     next->has_cpu = 1;
@@ -484,8 +483,6 @@ asmlinkage void __enter_scheduler(void)
 
     TRACE_2D(TRC_SCHED_SWITCH, next->domain, next);
 
-    ASSERT(next->processor == current->processor);
-
     switch_to(prev, next);
     
     if ( unlikely(prev->state == TASK_DYING) ) 
@@ -520,7 +517,6 @@ int idle_cpu(int cpu)
 static void s_timer_fn(unsigned long unused)
 {
     TRACE_0D(TRC_SCHED_S_TIMER_FN);
-    
     set_bit(_HYP_EVENT_NEED_RESCHED, &current->hyp_events);
     perfc_incrc(sched_irq);
 }
@@ -532,6 +528,8 @@ static void t_timer_fn(unsigned long unused)
 
     TRACE_0D(TRC_SCHED_T_TIMER_FN);
 
+    TRACE_0D(TRC_SCHED_T_TIMER_FN);
+
     if ( !is_idle_task(p) )
         send_guest_virq(p, VIRQ_TIMER);
 
@@ -611,10 +609,8 @@ void __init scheduler_init(void)
     if ( ops.do_schedule == NULL)
         panic("Chosen scheduler has NULL do_schedule!");
 
-    if ( SCHED_FN(init_scheduler) < 0 )
+    if ( SCHED_OP(init_scheduler) < 0 )
         panic("Initialising scheduler failed!");
-
-    SCHED_FN(add_task, &idle0_task);
 }
 
 /*
@@ -654,7 +650,7 @@ static void dump_rqueue(struct list_head *queue, char *name)
     list_for_each (list, queue) {
         p = list_entry(list, struct task_struct, run_list);
         printk("%3d: %llu has=%c ", loop++, p->domain, p->has_cpu ? 'T':'F');
-        SCHED_FN(dump_runq_el, p);
+        SCHED_OP(dump_runq_el, p);
         printk("c=0x%X%08X\n", (u32)(p->cpu_time>>32), (u32)p->cpu_time);
         printk("         l: %lx n: %lx  p: %lx\n",
                (unsigned long)list, (unsigned long)list->next,
@@ -670,18 +666,48 @@ void dump_runq(u_char key, void *dev_id, struct pt_regs *regs)
     int i;
 
     printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
-    SCHED_FN(dump_settings);
+    SCHED_OP(dump_settings);
     printk("NOW=0x%08X%08X\n",  (u32)(now>>32), (u32)now); 
     for (i = 0; i < smp_num_cpus; i++) {
         spin_lock_irqsave(&schedule_lock[i], flags);
         printk("CPU[%02d] ", i);
-        SCHED_FN(dump_cpu_state,i);
+        SCHED_OP(dump_cpu_state,i);
         dump_rqueue(&schedule_data[i].runqueue, "rq"); 
         spin_unlock_irqrestore(&schedule_lock[i], flags);
     }
     return; 
 }
 
+/* print human-readable "state", given the numeric code for that state */
+void sched_prn_state(int state)
+{
+    int ret = 0;
+    
+    switch(state)
+    {
+    case TASK_RUNNING:
+        printk("Running");
+        break;
+    case TASK_INTERRUPTIBLE:
+        printk("Int sleep");
+        break;
+    case TASK_UNINTERRUPTIBLE:
+        printk("UInt sleep");
+        break;
+    case TASK_STOPPED:
+        printk("Stopped");
+        break;
+    case TASK_DYING:
+        printk("Dying");
+        break;
+    default:
+        ret = SCHED_OP(prn_state, state);
+    }
+
+    if ( ret != 0 )
+        printk("Unknown");
+}
+
 #if defined(WAKEUP_HISTO) || defined(BLOCKTIME_HISTO)
 void print_sched_histo(u_char key, void *dev_id, struct pt_regs *regs)
 {
diff --git a/xen/include/hypervisor-ifs/dom0_ops.h b/xen/include/hypervisor-ifs/dom0_ops.h
index ce748d5d31..251f4853a4 100644
--- a/xen/include/hypervisor-ifs/dom0_ops.h
+++ b/xen/include/hypervisor-ifs/dom0_ops.h
@@ -18,7 +18,7 @@
  * This makes sure that old versions of dom0 tools will stop working in a
  * well-defined way (rather than crashing the machine, for instance).
  */
-#define DOM0_INTERFACE_VERSION   0xAAAA000A
+#define DOM0_INTERFACE_VERSION   0xAAAA000B
 
 #define MAX_CMD_LEN       256
 #define MAX_DOMAIN_NAME    16
@@ -96,7 +96,6 @@ typedef struct dom0_getdomaininfo_st
 #define DOMSTATE_STOPPED             1
     int state;
     int hyp_events;
-    unsigned long mcu_advance;
     unsigned int tot_pages;
     long long cpu_time;
     unsigned long shared_info_frame;  /* MFN of shared_info struct */
@@ -214,6 +213,16 @@ typedef struct dom0_pcidev_access_st
     int          enable;
 } dom0_pcidev_access_t;
 
+/*
+ * Get the ID of the current scheduler.
+ */
+#define DOM0_SCHED_ID        24
+typedef struct dom0_sched_id_st
+{
+    /* OUT variable */
+    int sched_id;
+} dom0_sched_id_t;
+
 typedef struct dom0_op_st
 {
     unsigned long cmd;
@@ -239,6 +248,7 @@ typedef struct dom0_op_st
         dom0_gettbufs_t         gettbufs;
         dom0_physinfo_t         physinfo;
         dom0_pcidev_access_t    pcidev_access;
+        dom0_sched_id_t         sched_id;
     } u;
 } dom0_op_t;
 
diff --git a/xen/include/hypervisor-ifs/sched_ctl.h b/xen/include/hypervisor-ifs/sched_ctl.h
index bd9e9d082c..a2e57c2b7c 100644
--- a/xen/include/hypervisor-ifs/sched_ctl.h
+++ b/xen/include/hypervisor-ifs/sched_ctl.h
@@ -7,18 +7,24 @@
 #ifndef __SCHED_CTL_H__
 #define __SCHED_CTL_H__
 
-/* Scheduler types. */
+/* Scheduler types */
 #define SCHED_BVT      0
 #define SCHED_ATROPOS  1
 #define SCHED_RROBIN   2
 
+/* these describe the intended direction used for a scheduler control or domain
+ * command */
+#define SCHED_INFO_PUT 0
+#define SCHED_INFO_GET 1
+
 /*
- * Generic scheduler control command: union of all scheduler control command
- * structures.
+ * Generic scheduler control command - used to adjust system-wide scheduler
+ * parameters
  */
 struct sched_ctl_cmd
 {
     unsigned int sched_id;
+    int direction;          /* are we getting or putting settings? */
     
     union
     {
@@ -40,6 +46,7 @@ struct sched_adjdom_cmd
 {
     unsigned int sched_id;
     domid_t domain;
+    int direction;          /* are we getting or putting settings? */
     
     union
     {
@@ -53,6 +60,9 @@ struct sched_adjdom_cmd
 
         struct atropos_adjdom
         {
+            u64 period;
+            u64 slice;
+            u64 latency;
             int xtratime;
         } atropos;
     } u;
diff --git a/xen/include/xen/sched-if.h b/xen/include/xen/sched-if.h
index 683e73d4f6..7e55f46b91 100644
--- a/xen/include/xen/sched-if.h
+++ b/xen/include/xen/sched-if.h
@@ -40,8 +40,7 @@ struct scheduler
     void         (*free_task)      (struct task_struct *);
     void         (*rem_task)       (struct task_struct *);
     void         (*wake_up)        (struct task_struct *);
-    /* XXX why does do_block need to return anything at all? */
-    long         (*do_block)       (struct task_struct *);
+    void         (*do_block)       (struct task_struct *);
     task_slice_t (*do_schedule)    (s_time_t);
     int          (*control)        (struct sched_ctl_cmd *);
     int          (*adjdom)         (struct task_struct *,
@@ -50,6 +49,7 @@ struct scheduler
     void         (*dump_settings)  (void);
     void         (*dump_cpu_state) (int);
     void         (*dump_runq_el)   (struct task_struct *);
+    int          (*prn_state)      (int);
 };
 
 /* per CPU scheduler information */
diff --git a/xen/include/xen/sched.h b/xen/include/xen/sched.h
index c12ac2ca5e..1b8bd10d3c 100644
--- a/xen/include/xen/sched.h
+++ b/xen/include/xen/sched.h
@@ -1,6 +1,9 @@
 #ifndef _LINUX_SCHED_H
 #define _LINUX_SCHED_H
 
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/spinlock.h>
 #include <xen/config.h>
 #include <xen/types.h>
 #include <xen/spinlock.h>
@@ -266,6 +269,7 @@ void sched_add_domain(struct task_struct *p);
 int  sched_rem_domain(struct task_struct *p);
 long sched_ctl(struct sched_ctl_cmd *);
 long sched_adjdom(struct sched_adjdom_cmd *);
+int  sched_id();
 void init_idle_task(void);
 void __wake_up(struct task_struct *p);
 void wake_up(struct task_struct *p);
@@ -302,6 +306,7 @@ void startup_cpu_idle_loop(void);
 void continue_cpu_idle_loop(void);
 
 void continue_nonidle_task(void);
+void sched_prn_state(int state);
 
 /* This task_hash and task_list are protected by the tasklist_lock. */
 #define TASK_HASH_SIZE 256
-- 
2.30.2